Packages Used

# global default settings for chunks
knitr::opts_chunk$set( eval = TRUE, warning = FALSE, message = FALSE,
                      fig.dim = c(6, 3)
                      )

# loaded packages; placed here to be able to load global settings
#install.packages(c("kableExtra", "patchwork","DescTools","tidyverse","tseries","DataCombine","forecast","plotly","ggpubr","gridExtra","pastecs"))
#tinytex::install_tinytex()


Packages <- c("tidyverse", "arsenal", "readxl", 
              "patchwork", "GGally", "ghibli","plotly","ggpubr","gridExtra",
              "pastecs","forecast","DataCombine","tseries","kableExtra","viridis")
invisible(lapply(Packages, library, character.only = TRUE))


# theme global setting for ggplot
theme_set(theme_minimal() + 
            theme(legend.position = "bottom") +
            theme(plot.title = element_text(hjust = 0.5, size = 12),
                  plot.subtitle = element_text(hjust = 0.5, size = 8))
          )

Skin Bleaching Time Series Analysis

Distribution

This dataset contains information for years 2004 - 2019, however only 2008 onward will be analyzed.

#Here I am loading in the data.

skbt<- read.csv("./data/GtrendData/bleachingTime08_18.csv", header = T, sep = ",",stringsAsFactors = F, col.names = c("time","rsv"))

#deleting first row that contains duplicate row name

skbt<- skbt[-1,] %>% 

#formating rows to correct class and separating time varaiable to year and month 
 mutate(rsv = as.numeric(rsv)) %>% 
  rename(
    date = time) %>% 
    separate(date, into = c("year","month"), sep = "\\-" ) %>% 
  mutate(
    month = as.numeric(month),
    month_name = month.abb[month],
    year_f = as.factor(year),
    month_name = as.factor(month_name)) %>%
  arrange(year)

Relative search interest is right skewed. Given that it is already on a relative scale it may be best leave it untransfromed for preliminary analysis.

skbt %>%
  
  ggplot(aes(x = rsv))+
  geom_histogram()+
  labs(title = "Distribution of Relative Search Volume between 2008 - 2018")

skbt %>% 
  pivot_wider(
    names_from = month_name,
    values_from  = rsv
  ) %>%
    as_tibble() 

We are interested in understanding how the data changes overtime. Here we see the distribution of RSV by month. Very clearly we observe that their may be some seasonality where the highest RSV in every year may be in the summer months of June and July.

skbt  %>% 
   ggplot(aes(x = month_name, y = rsv))+
   geom_violin()+
   ggtitle("Distibution of Relative Search Volume By Month for years 2008 - 2018")

Plotting the temporal trend

The temporal trend is both represented by year and month where the colors on both graphs correspond with the same year and month that the observation occured. Ex: The peak in 2009 from the trend plot is directly associated with the peak in month July from the seasonal plot.

b0<- skbt  %>%
  mutate(
         month_name  = forcats::fct_relevel(month_name, "Jan", "Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"),
         month_cont = c(1:132)) %>% 
  ggplot(aes(x = month_cont, y = rsv, col = year_f))+geom_line()+
  scale_fill_viridis_d(option = "A", aesthetics = "col", name = "Year Color")+theme(legend.position = "bottom", axis.text.x = element_blank())+
  labs(
    title = "SKin Bleaching Trend from 2004 To 2018", 
    x = "Year", 
    y = "Relative Search Volume"
    ) 

b1<- skbt %>% 
  ggplot(aes(x=month_name, y= rsv, col = year_f, group = year
             ))+geom_line()+
  scale_fill_viridis_d(option = "A", aesthetics = "col", name = "Year Color")+
  labs(title = "Seasonal Plot of Relative Search Volume for years 2008 - 2018", x = " Month")+theme(legend.position = "bottom")

b0 + plot_layout(ncol = 2) +b1 

In trying to get an idea the percent change by month however I am having issues with the code.

skbt  %>% 
  group_by(year_f) %>% 
  PercChange( Var = "rsv", slideBy = 1) %>% 
  summary() %>% 
  knitr::kable()

Here we observe the decomposed form of the temporal data. There seems to be an increasing trend and seasonal pattern.

skbt %>% 
  pull(rsv) %>% 
  ts(frequency = 12) %>% 
  stl( s.window = "periodic") %>% 
plot()

To statistically assess the data for a trend and seasonality we untilized the autocorellation function and produced a corelogram. In this plot we obaserve a positive test for a trend and seasonality being yearly. In addition the spearman’s rank correlation suggest a evidence of a trend p_value < 0.0001

skbt %>%
  pull(rsv) %>% 
  ggAcf()

 skbt %>%
   pull(rsv) %>% 
  ts() %>%  
  pastecs::trend.test(R = 1) %>% 
   broom::tidy() %>% 
   select(method, estimate, p.value) %>% 
   rename("r" = estimate) %>% 
   knitr::kable()
method r p.value
Spearman’s rank correlation rho 0.4240577 4e-07

—–This seasonal plot is still underconstruction—-

 skbt %>% 
  pull(rsv) %>% 
  na.omit() %>% 
  ts(start = 2008, deltat = 1/12, 
   #  name = "year"
     ) %>% 
      stl( s.window = "periodic") %>% 
  seasadj() %>% 
  ggsubseriesplot(main = "Skin Bleaching")

Skin Bleaching Regional Analysis

#Loading in the data
skbr <- read.csv("./data/GtrendData/bleachingRegion08_18.csv", header = T, sep = ",",stringsAsFactors = F, col.names = c("region","rsv"))

skbr<- skbr[-1,] %>% 
  
  #removing duplicated column names
 mutate(rsv = as.numeric(rsv)) %>% 
  rename(state = region)

Distribution

The regional distribution is very odd.

skbr %>% 
  mutate(rsv = recode(rsv, .missing = 0)) %>% 
  ggplot(aes(x = rsv))+geom_histogram(bins = 30 )+labs(title = "Distribution of Regional Relative Search Intersts for year 2008 - 2018")

Choropleth map

# used this data set to obtain state codes used in plotly to map according to state
codes <- read_csv("https://raw.githubusercontent.com/plotly/datasets/master/2011_us_ag_exports.csv") %>% 
  select(state, code)

# give state boundaries a white border
l <- list(color = toRGB("white"), width = 2)

# specify some map projection/options
g <- list(
  scope = 'usa',
  projection = list(type = 'albers usa'),
  showlakes = TRUE,
  lakecolor = toRGB('white')
)

#merging both the codes and rsv together within ploty commands using left_join.
p <- plot_geo(
      #data set
      left_join(codes, skbr, by = "state"), 
      #map
      locationmode = 'USA-states') %>%
    add_trace(
      z = ~rsv, 
      #text = ~hover, 
      locations = ~code,
      color = ~rsv, colors = 'Purples'
  ) %>%
    colorbar(title = "Volume In Percentage") %>%
    layout(
      title = 'Relative Search Volume of <br> Skin Bleaching in The US 2008 - 2018',
      geo = g
  )

p